/Project |-Database/ |-Binaries/ |-Figures/ |-natlantic |_ beakerBanter_natlantic.Rmd |_ natlantic_banter_data.rds |_ natlantic_banter_data_ici.rds |_ natlantic_banter_ec_model_t1e4s3_t1e4s3.rds |_ natlantic_banter_model_ici_t1e4s3_t1e5s3.rds |_ natlantic_banter_model_alt_t1e4s5_t1e4s5.rds

PAMpal Data Processing

Start by loading the required packages

library("easypackages")
## Warning: package 'easypackages' was built under R version 4.2.3
libraries("PAMpal", "banter", "rfPermute", "readxl", "stringr", "dplyr", "kableExtra", "magick", "magrittr", "here")
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'banter' was built under R version 4.2.3
## Warning: package 'readxl' was built under R version 4.2.3
## Warning: package 'magick' was built under R version 4.2.3
here()
## [1] "C:/Users/shannon.rankin/Documents/GitHub/BANTER_BeakedWhales"
  1. Set up our PPS (PAMPal Settings Object) for the NAtlantic dataset
pps <- PAMpalSettings(db='Database/', 
                      binaries = 'Binaries/',
                      sr_hz='auto', 
                      winLen_sec=.0025, 
                      filterfrom_khz=10, 
                      filterto_khz=NULL)
  1. Process data and save to file to eliminate repeated processing.

If this is the initial processing, ensure you have set ‘freshRun = TRUE’ at top of this document to process and save data. This will take some time to run.

data <- processPgDetections(pps, mode='db', id='natlantic_bw')
saveRDS(data, 'natlantic_study.rds')
# Double check warning messages
print(getWarnings(data)$message)

If you have already run the processing code, ensure you have set ‘freshRun = FALSE’ at top of this document to read in the existing .rds file for downstream processing.

  1. Assign species identity according to original PAMguard labels, then relabel for consistency across projects.

    Final species ID for beaked whale detections was provided by A. DeAngelis as an excel spreadsheet. The following code chunk pulls out the required information (event, species) in the format required for PAMpal.

HB1603 <- read_excel("HB1603_BWs.xlsx", sheet = "Alldata")#read in data
HB1603 <- filter(HB1603, !eventType == "BRAN")
#create formatted event/species dataframe 
speciesID <- HB1603[c("PGId", "Final Species Classification", "Database")] %>% 
  mutate("event" = paste(Database, PGId, sep=".OE")) %>% 
  rename("species" = "Final Species Classification") %>% 
  mutate("species" = str_replace_all(species, "'", ""),
         "species" = str_replace(species, "Gervais/Trues", "BW")) %>% 
  select(all_of(c("event", "species")))

#Assign species ID to data
data <- setSpecies(data, method='manual', value = speciesID)
#Note: 156 detections are not given species IDs and will be filtered out at a later step.
  1. Filter out unwanted data: Subset and rename species for BANTER model
goodSpecies<- c(c("Trues", "Cuviers", "Sowerbys", "Gervais"))
data <- filter(data, species %in% goodSpecies)
  1. Calculate Inter-Click Interval (ICI).
data <- calculateICI(data, time='peakTime')
  1. Add GPS data: Add GPS data from PAMGuard table (2 hour threshold), then filter out data without GPS
data <- addGps(data, thresh = 7200)#11 events over the threshold
getWarnings(data)
  1. Add Environmental Data
#SST
data <- matchEnvData(data, nc='jplMURSST41mday', var='sst')
#Seafloor Depth
data <- matchEnvData(data, nc='usgsCeSS111', var='topo')
  1. Export data for BANTER (and drop species codes that will not be used for training). We will create two datasets: one with ICI and one without ICI, and save these for import into BANTER.
banter_data <- export_banter(data, dropSpecies = 'unid', 
                         dropVars = c('All_ici', 'topo', 'sst', 'topo_mean', 'sst_mean', 
                                      'Latitude', 'Longitude', 'gpsUncertainty'), training=TRUE)
saveRDS(banter_data, file='natlantic_banter_data.rds')

banter_data_ici <- export_banter(data, dropSpecies = 'unid', 
                                 dropVars = c('topo', 'sst', 'topo_mean', 'sst_mean', 
                                              'Latitude', 'Longitude', 'gpsUncertainty'),training=TRUE)
saveRDS(banter_data_ici, file='natlantic_banter_data_ici.rds')

banter_data_env <- export_banter(data, dropSpecies = c('unid'), 
                                 dropVars = c('Latitude', 'Longitude', 'gpsUncertainty'), training=TRUE)
saveRDS(banter_data_env, file='natlantic_banter_data_env.rds')

#save update of Acoustic Study
saveRDS(data, 'natlantic_study.rds')

Build a BANTER Classification Model

EC (only) Model

Initialize, Run & Evaluate Detector Model (stage 1).

banter_model_ec <- initBanterModel(banter_data$events)
banter_model_ec <- addBanterDetector(banter_model_ec, banter_data$detectors, ntree=1e4, sampsize=3, importance = TRUE)

plotDetectorTrace(banter_model_ec, detector = paste0('Click_Detector_', 3:4))
plotDetectorTrace(banter_model_ec, detector = paste0('Click_Detector_', 5:6))
summary(banter_model_ec)

Run BANTER Event Model (stage 2)

banter_model_ec <- runBanterModel(banter_model_ec, ntree=1e4, sampsize=3)
summary(banter_model_ec)

Once a stable model is identified, save model with tree/sampsize info in the filename.

saveRDS(banter_model_ec, 'natlantic_banter_ec_model_t1e4s3_t1e4s3.rds')

ICI Model

Initialize, Run & Evaluate Detector Model (stage 1).

banter_model_ici <- initBanterModel(banter_data_ici$events)
banter_model_ici <- addBanterDetector(banter_model_ici, banter_data_ici$detectors, ntree=1e4, sampsize=3, importance = TRUE)

plotDetectorTrace(banter_model_ici, detector = paste0('Click_Detector_', 3:4))
plotDetectorTrace(banter_model_ici, detector = paste0('Click_Detector_', 5:6))
summary(banter_model_ici)

Run BANTER Event Model (stage 2)

banter_model_ici <- runBanterModel(banter_model_ici, ntree=1e5, sampsize=3)
summary(banter_model_ici)

Once a stable model is identified, save model with tree/sampsize info in the filename.

saveRDS(banter_model_ici, 'natlantic_banter_model_ici_t1e4s3_t1e5s3.rds')

ENV Model

Initialize, Run & Evaluate Detector Model (stage 1).

banter_model_env <- initBanterModel(banter_data_env$events)
banter_model_env <- addBanterDetector(banter_model_env, banter_data_env$detectors, ntree=1e5, sampsize=3, importance = TRUE)

plotDetectorTrace(banter_model_env, detector = paste0('Click_Detector_', 3:4))
plotDetectorTrace(banter_model_env, detector = paste0('Click_Detector_', 5:6))
summary(banter_model_env)

Run Event Model (stage 2)

banter_model_env <- runBanterModel(banter_model_env, ntree=1e5, sampsize=3)
summary(banter_model_env)

Once a stable model is identified, save model with tree/sampsize info in the filename.

saveRDS(banter_model_env, 'natlantic_banter_model_env_t1e5s3_t1e5s3.rds')

Alternative Model (EC_ICI_ALT) with larger samp size

Initialize, Run & Evaluate Detector Model (stage 1).

banter_model_alt <- initBanterModel(banter_data_ici$events)
banter_model_alt <- addBanterDetector(banter_model_alt, banter_data_ici$detectors, ntree=1e4, sampsize=5, importance = TRUE)

plotDetectorTrace(banter_model_alt, detector = paste0('Click_Detector_', 3:4))
plotDetectorTrace(banter_model_alt, detector = paste0('Click_Detector_', 5:6))
summary(banter_model_alt)

Run BANTER Event Model (stage 2)

banter_model_alt <- runBanterModel(banter_model_alt, ntree=1e5, sampsize=5)
summary(banter_model_alt)

Once a stable model is identified, save model with tree/sampsize info in the filename.

saveRDS(banter_model_alt, 'natlantic_banter_model_alt_t1e4s5_t1e5s5.rds')

BANTER Analytics

There are a number of visualizations/data products that allow us to visualize our BANTER classifier; most use the rfPermute package (see BANTER Guidelines for more information)

First, identify the model you would like to examine (comment out the model you do not want to examine).

model_ec <- banter_model_ec
modelname_ec <- "banter_model_ec"

model_ici <- banter_model_ici
modelname_ici <- "banter_model_ici"

model_env <- banter_model_env
modelname_env <- "banter_model_env"

model_alt <- banter_model_alt
modelname_alt <- "banter_model_alt"

Extract the Random Forest model object from our BANTER model for analysis.

banter_model_ec_RF <- getBanterModel(model_ec)
banter_model_ici_RF <- getBanterModel(model_ici)
banter_model_env_RF <- getBanterModel(model_env)
banter_model_alt_RF <- getBanterModel(model_alt)

Class Priors (Expected Error Rate)

natlantic_ec_priors <- classPriors(banter_model_ec_RF, NULL)[,1]
natlantic_ici_priors <- classPriors(banter_model_ici_RF, NULL)[,1]
natlantic_env_priors <- classPriors(banter_model_env_RF, NULL)[,1]
natlantic_alt_priors <- classPriors(banter_model_alt_RF, NULL)[,1]

Confusion Matrix (simple model)

natlantic_ec_confuseMatrix <- rfPermute::confusionMatrix(banter_model_ec_RF)
natlantic_ec_confuseMatrix <- cbind(natlantic_ec_confuseMatrix, priors = natlantic_ec_priors)

natlantic_ec_confuseMatrix <- kable(natlantic_ec_confuseMatrix, align = "c", digits = c(0,0,0,0,2,2,2))%>%
  kable_classic()%>%
  column_spec(5, border_right = TRUE)%>%
  row_spec(0, bold = TRUE)%>%
  row_spec(3,hline_after = TRUE)%>%
  row_spec(5, bold = TRUE)%>%
  save_kable('../manuscript/manuscript_files/natlantic_ec_confuseMatrix.png', zoom = 9)

natlantic_ici_confuseMatrix <- rfPermute::confusionMatrix(banter_model_ici_RF)
natlantic_ici_confuseMatrix <- cbind(natlantic_ici_confuseMatrix, priors = natlantic_ici_priors)

natlantic_ici_confuseMatrix <- kable(natlantic_ici_confuseMatrix, align = "c", digits = c(0,0,0,0,2,2,2))%>%
  kable_classic()%>%
  column_spec(5, border_right = TRUE)%>%
  row_spec(0, bold = TRUE)%>%
  row_spec(3,hline_after = TRUE)%>%
  row_spec(5, bold = TRUE)%>%
  save_kable('../manuscript/manuscript_files/natlantic_ici_confuseMatrix.png', zoom = 9)

natlantic_env_confuseMatrix <- rfPermute::confusionMatrix(banter_model_env_RF)
natlantic_env_confuseMatrix <- cbind(natlantic_env_confuseMatrix, priors = natlantic_env_priors)

natlantic_env_confuseMatrix <- kable(natlantic_env_confuseMatrix, align = "c", digits = c(0,0,0,0,2,2,2))%>%
  kable_classic()%>%
  column_spec(5, border_right = TRUE)%>%
  row_spec(0, bold = TRUE)%>%
  row_spec(3,hline_after = TRUE)%>%
  row_spec(5, bold = TRUE)%>%
  save_kable('../manuscript/manuscript_files/natlantic_env_confuseMatrix.png', zoom = 9)

natlantic_alt_confuseMatrix <- rfPermute::confusionMatrix(banter_model_alt_RF)
natlantic_alt_confuseMatrix <- cbind(natlantic_alt_confuseMatrix, priors = natlantic_alt_priors)

natlantic_alt_confuseMatrix <- kable(natlantic_alt_confuseMatrix, align = "c", digits = c(0,0,0,0,2,2,2))%>%
  kable_classic()%>%
  column_spec(4, border_right = TRUE)%>%
  row_spec(0, bold = TRUE)%>%
  row_spec(3,hline_after = TRUE)%>%
  row_spec(4, bold = TRUE)%>%
  save_kable('../manuscript/manuscript_files/natlantic_alt_confuseMatrix.png', zoom = 9)

BANTER Model N. Atlantic EC Confusion Matrix

BANTER Model N. Atlantic ICI Confusion Matrix

BANTER Model N. Atlantic ENV Confusion Matrix

BANTER Model N. Atlantic Alt Confusion Matrix

Proximity Plot

png(('../manuscript/manuscript_files/natlantic_ec_proximity.png'), width = 20, height = 20, units = 'cm', res = 300)
natlantic_ec_proximityPlot <- plotProximity(banter_model_ec_RF)
dev.off()
natlantic_ec_proximityPlot <- plotProximity(banter_model_ec_RF)

png(('../manuscript/manuscript_files/natlantic_ici_proximity.png'), width = 20, height = 20, units = 'cm', res = 300)
ici_natlantic_proximityPlot <- plotProximity(banter_model_ici_RF)
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
dev.off()
ici_natlantic_proximityPlot <- plotProximity(banter_model_ici_RF)
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

png(('../manuscript/manuscript_files/natlantic_env_proximity.png'), width = 20, height = 20, units = 'cm', res = 300)
env_natlantic_proximityPlot <- plotProximity(banter_model_env_RF)
dev.off()
env_natlantic_proximityPlot <- plotProximity(banter_model_env_RF)

png(('../manuscript/manuscript_files/natlantic_alt_proximity.png'), width = 20, height = 20, units = 'cm', res = 300)
alt_natlantic_proximityPlot <- plotProximity(banter_model_alt_RF)
dev.off()
alt_natlantic_proximityPlot <- plotProximity(banter_model_alt_RF)

Importance Heatmap

png(('../manuscript/manuscript_files/natlantic_ec_importance.png'), width = 30, height = 25, units = 'cm', res = 300)
plotImportance(banter_model_ec_RF, plot.type="heatmap", n=10)
dev.off()
natlantic_ec_importance <- plotImportance(banter_model_ec_RF, plot.type="heatmap", n=10)

png(('../manuscript/manuscript_files/natlantic_ici_importance.png'), width = 30, height = 25, units = 'cm', res = 300)
plotImportance(banter_model_ici_RF, plot.type="heatmap", n=10)
dev.off()
ici_natlantic_importance <- plotImportance(banter_model_ici_RF, plot.type="heatmap", n=10)

png(('../manuscript/manuscript_files/natlantic_env_importance.png'), width = 30, height = 25, units = 'cm', res = 300)
plotImportance(banter_model_env_RF, plot.type="heatmap", n=10)
dev.off()
env_natlantic_importance <- plotImportance(banter_model_env_RF, plot.type="heatmap", n=10)

png(('../manuscript/manuscript_files/natlantic_alt_importance.png'), width = 30, height = 25, units = 'cm', res = 300)
plotImportance(banter_model_alt_RF, plot.type="heatmap", n=10)
dev.off()
alt_natlantic_importance <- plotImportance(banter_model_alt_RF, plot.type="heatmap", n=10)

PlotVotes

png(('../manuscript/manuscript_files/natlantic_ec_votes.png'), width = 20, height = 20, units = 'cm',  res = 300)
plotVotes(banter_model_ec_RF)
dev.off()
natlantic_ec_votes <- plotVotes(banter_model_ec_RF)

png(('../manuscript/manuscript_files/natlantic_ici_votes.png'), width = 20, height = 20, units = 'cm',  res = 300)
plotVotes(banter_model_ici_RF)
dev.off()
ici_natlantic_votes <- plotVotes(banter_model_ici_RF)

png(('../manuscript/manuscript_files/natlantic_env_votes.png'), width = 20, height = 20, units = 'cm',  res = 300)
plotVotes(banter_model_env_RF)
dev.off()
env_natlantic_votes <- plotVotes(banter_model_env_RF)

png(('../manuscript/manuscript_files/natlantic_alt_votes.png'), width = 20, height = 20, units = 'cm',  res = 300)
plotVotes(banter_model_alt_RF)
dev.off()
alt_natlantic_votes <- plotVotes(banter_model_alt_RF)

Plot Predicted Probabilities

plotPredictedProbs(banter_model_ec_RF, bins=30, plot=TRUE)

plotPredictedProbs(banter_model_ici_RF, bins=30, plot=TRUE)

plotPredictedProbs(banter_model_env_RF, bins=30, plot=TRUE)

plotPredictedProbs(banter_model_alt_RF, bins=30, plot=TRUE)

Create Figure for Publication

confuse <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_ec_confuseMatrix.png'))%>%
  image_border(color="#ffffff", geometry = "50x130")%>%
  image_annotate("a) Confusion Matrix", size=300, color = "black")
vote <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_ec_votes.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_annotate("d) Vote Plot", size=300, color = "black")
prox <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_ec_proximity.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_annotate("b) Proximity Plot", size=300, color = "black")
heat <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_ec_importance.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_scale("3300")%>%
  image_annotate("d) Importance Heat Map", size=300, color = "black")
  
natlantic_ec_Figure <-image_append(c(prox, heat, vote))
natlantic_ec_Figure<- image_append(c(confuse, natlantic_ec_Figure), stack=TRUE)
image_write(natlantic_ec_Figure, path = here('manuscript', 'manuscript_files','natlantic_ec_Figure.png'), format ='png')
print(natlantic_ec_Figure, info=FALSE)

confuse <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_ici_confuseMatrix.png'))%>%
  image_border(color="#ffffff", geometry = "50x130")%>%
  image_annotate("a) Confusion Matrix", size=300, color = "black")
vote <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_ici_votes.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_annotate("d) Vote Plot", size=300, color = "black")
prox <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_ici_proximity.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_annotate("b) Proximity Plot", size=300, color = "black")
heat <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_ici_importance.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_scale("3300")%>%
  image_annotate("d) Importance Heat Map", size=300, color = "black")
  
natlantic_ici_Figure <-image_append(c(prox, heat, vote))
natlantic_ici_Figure<- image_append(c(confuse, natlantic_ici_Figure), stack=TRUE)
image_write(natlantic_ici_Figure, path = here('manuscript', 'manuscript_files','natlantic_ici_Figure.png'), format ='png')
print(natlantic_ici_Figure, info=FALSE)

confuse <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_env_confuseMatrix.png'))%>%
  image_border(color="#ffffff", geometry = "50x130")%>%
  image_annotate("a) Confusion Matrix", size=300, color = "black")
vote <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_env_votes.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_annotate("d) Vote Plot", size=300, color = "black")
prox <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_env_proximity.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_annotate("b) Proximity Plot", size=300, color = "black")
heat <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_env_importance.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_scale("3300")%>%
  image_annotate("d) Importance Heat Map", size=300, color = "black")
  
natlantic_env_Figure <-image_append(c(prox, heat, vote))
natlantic_env_Figure<- image_append(c(confuse, natlantic_env_Figure), stack=TRUE)
image_write(natlantic_env_Figure, path = here('manuscript', 'manuscript_files','natlantic_env_Figure.png'), format ='png')
print(natlantic_env_Figure, info=FALSE)

confuse <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_alt_confuseMatrix.png'))%>%
  image_border(color="#ffffff", geometry = "50x130")%>%
  image_annotate("a) Confusion Matrix", size=300, color = "black")
vote <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_alt_votes.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_annotate("d) Vote Plot", size=300, color = "black")
prox <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_alt_proximity.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_annotate("b) Proximity Plot", size=300, color = "black")
heat <- magick::image_read(here('manuscript', 'manuscript_files', 'natlantic_alt_importance.png'))%>%
  image_border(color="#ffffff", geometry = "270x130")%>%
  image_scale("3300")%>%
  image_annotate("d) Importance Heat Map", size=300, color = "black")
  
natlantic_alt_Figure <-image_append(c(prox, heat, vote))
natlantic_alt_Figure<- image_append(c(confuse, natlantic_alt_Figure), stack=TRUE)
image_write(natlantic_alt_Figure, path = here('manuscript', 'manuscript_files','natlantic_alt_Figure.png'), format ='png')
print(natlantic_alt_Figure, info=FALSE)